rm(list = ls())

library(ggplot2)
library(tidyverse)
library(reshape)
library(Matching)
library(ggrepel)
library(RVAideMemoire)
library(dgof)
library(mltools)
library(mediation)
library(effectsize)
library(lsr)
library(pwr)
options(warn=-1)


############ load the data ##############
df = read.csv('vaccine_treatment_survey.csv')


# thought we asked in our screen-survey vaccination status, we still asked this question this treatment survey 
# to make sure all the treated participants are vaccinated when being surveyed.
# filter out those whose responses are not "unvaccinated"
df1 = df[df$vax_status == 'Have not gotten the COVID-19 vaccine', ]

# check missing values in the data
colSums(is.na(df1))

# recode demographic variables 
df1$age = as.numeric(factor(df1$age,
                            levels = c("18 - 24", "25 - 34", "35 - 44", "45 - 54",
                                       "55 - 64", "65 - 74", "75 - 84", "85 or older")))
df1$edu = as.numeric(factor(df1$edu,
                           levels = c("Did not graduate from high school",
                                      "High school diploma or the equivalent (GED)",
                                      "Some college",
                                      "Associate's degree",
                                      "Bachelor's degree",
                                      "Master's degree",
                                      "Professional or doctorate degree")))

df1$interest_pol = as.numeric(factor(df1$interest_pol,
                              levels = c("Not at all interested",
                                         "Not very interested",
                                         "Somewhat interested",
                                         "Very interested",
                                         "Extremely interested")))

# recoding race and create new variable composed of 4 types 
df1$new_race = df1$race
df1$new_race[!df1$race %in% c("White",
                              "Black or African American",     
                              "Asian/Pacific Islander") ] = "Multi-racial or Other"
df1$new_race = factor(df1$new_race, 
                      levels = c("White",
                                 "Black or African American",    
                                 "Asian/Pacific Islander", 
                                 "Multi-racial or Other"))

# recoding the gender variable into male or non-male
df1$new_male = factor(ifelse(df1$gender == "Male", "Male", "non-Male"), 
                      levels = c("non-Male", "Male"))

df1$ideology = as.numeric(factor(df1$ideology, 
                      levels = c('Very liberal', 
                                 'Liberal', 
                                 'Slightly liberal', 
                                 'Moderate; middle of the road',
                                 'Slightly conservative',
                                 'Conservative',  
                                 'Very conservative')))

# recode perceived financial risk variable: The larger the value is, the higher financial risk the respondent perceives.
df1$new_risk_perfinan = 8 - as.numeric(factor(df1$risk_perfinan, 
                                      levels = c("Strongly agree", 
                                                 "Agree", 
                                                 "Somewhat agree",
                                                 "Neither agree nor disagree", 
                                                 "Somewhat disagree",
                                                 "Disagree", 
                                                 "Strongly disagree")))

df1$new_risk_econ = 8 - as.numeric(factor(df1$risk_econ, 
                                              levels = c("Strongly agree", 
                                                         "Agree", 
                                                         "Somewhat agree",
                                                         "Neither agree nor disagree", 
                                                         "Somewhat disagree",
                                                         "Disagree", 
                                                         "Strongly disagree")))

# based on treatment variable, create variable indicating two message frames
df1$frame = NA
df1$frame[df1$treatment %in% c("EconInterventionwithGist", "EconInterventionnogist")] = "econ"
df1$frame[df1$treatment %in% c("HealthInterventionwithGist", "HealthInterventionnogist")] = "health"
df1$frame = factor(df1$frame, levels = c('health', 'econ'))

# given we have bottom-line sentence manipulation, we create a gist variable 
df1$gist = NA
df1$gist[df1$treatment %in% c("EconInterventionwithGist", "HealthInterventionwithGist")] = "gist"
df1$gist[df1$treatment %in% c("HealthInterventionnogist", "EconInterventionnogist")] = "nogist"
df1$gist = factor(df1$gist, levels = c("nogist", "gist"))

# remove participants whose partisanship is "something else" or empty
df2 = df1[df1$party %in% c("Republican", "Democrat", "Independent"), ]


########
# check balance of frame treatment under party id
df2 %>% group_by(frame, party) %>% tally()

# recoding race and create new variable composed of 4 types 
df2$new_race = df2$race
df2$new_race[!df2$race %in% c("White",
                              "Black or African American",     
                              "Asian/Pacific Islander") ] = "Multi-racial or Other"
df2$new_race = factor(df2$new_race, 
                      levels = c("White",
                                 "Black or African American",    
                                 "Asian/Pacific Islander", 
                                 "Multi-racial or Other"))
table(df2$new_race)

# Chisq test on demographic factors
chisq.test(table(df2$age[df2$party == 'Republican'], df2$frame[df2$party == 'Republican']))
chisq.test(table(df2$race[df2$party == 'Republican'], df2$frame[df2$party == 'Republican']))
chisq.test(table(df2$new_race[df2$party == 'Republican'], df2$frame[df2$party == 'Republican']))
chisq.test(table(df2$gender[df2$party == 'Republican'], df2$frame[df2$party == 'Republican']))
chisq.test(table(df2$edu[df2$party == 'Republican'], df2$frame[df2$party == 'Republican']))
chisq.test(table(df2$hisp[df2$party == 'Republican'], df2$frame[df2$party == 'Republican']))
chisq.test(table(df2$ideology[df2$party == 'Republican'], df2$frame[df2$party == 'Republican']))


chisq.test(table(df2$age[df2$party == 'Democrat'], df2$frame[df2$party == 'Democrat']))
chisq.test(table(df2$race[df2$party == 'Democrat'], df2$frame[df2$party == 'Democrat']))
chisq.test(table(df2$new_race[df2$party == 'Democrat'], df2$frame[df2$party == 'Democrat']))
chisq.test(table(df2$gender[df2$party == 'Democrat'], df2$frame[df2$party == 'Democrat']))
chisq.test(table(df2$edu[df2$party == 'Democrat'], df2$frame[df2$party == 'Democrat']))
chisq.test(table(df2$hisp[df2$party == 'Democrat'], df2$frame[df2$party == 'Democrat']))
chisq.test(table(df2$ideology[df2$party == 'Democrat'], df2$frame[df2$party == 'Democrat']))

# histograms of demographic factors and ks test
# age

df2 %>% group_by(frame, party) %>% summarise(mean(age))
check_age = df2 %>% group_by(frame, party, age) %>% 
  tally() %>% 
  mutate(Frame = ifelse(frame == 'health', 
                                        'health-frame',
                                        'economy-frame')) %>%
  mutate(new_age = case_when(age == 1 ~ '18-24', 
                           age == 2 ~ '25-34', 
                           age == 3 ~ '35-44',
                           age == 4 ~ '45-54',
                           age == 5 ~ '55-64',
                           age == 6 ~ '65+',
                           age == 7 ~ '65+')) 

check_age %>%
  ggplot(aes(x = new_age, y=n, fill=Frame)) +
  geom_bar(stat ='identity', position = position_dodge()) +
  theme_bw() + scale_fill_manual(values = c('gray', 'black')) +
  labs(x = "Age" , y= "Count") + 
  facet_wrap(~party)


                            
ks.test(check_age$n[(check_age$frame == 'health') &  (check_gender$party == 'Republican')],
        check_age$n[(check_age$frame == 'econ' )&  (check_gender$party == 'Republican')] )

ks.test(check_age$n[(check_age$frame == 'health') &  (check_gender$party == 'Democrat')],
        check_age$n[(check_age$frame == 'econ' )&  (check_gender$party == 'Democrat')] )




# other demographic variables under treatments
# gender
check_gender = df2 %>% group_by(frame, party, gender) %>% 
  tally() %>%
  mutate(Frame = ifelse(frame == 'health', 
                        'health-frame',
                        'economy-frame')) 
check_gender %>%  
  ggplot(aes(x = gender, y=n, fill=Frame)) +
  geom_bar(stat ='identity', position = position_dodge()) +
  theme_bw() + scale_fill_manual(values = c('gray', 'black')) +
  labs(x = "Gender" , y= "Count") + 
  facet_wrap(~party)

ks.test(check_gender$n[(check_gender$frame == 'health') & (check_gender$party == 'Republican')],
        check_gender$n[(check_gender$frame == 'econ') & (check_gender$party == 'Republican')])

ks.test(check_gender$n[(check_gender$frame == 'health') & (check_gender$party == 'Democrat')],
        check_gender$n[(check_gender$frame == 'econ') & (check_gender$party == 'Democrat')])
## race
check_race = df2 %>% group_by(frame, party, new_race) %>% 
  tally() %>%
  mutate(Frame = ifelse(frame == 'health', 
                        'health-frame',
                        'economy-frame')) 
check_race %>%  
  ggplot(aes(x = new_race, y=n, fill=Frame)) +
  geom_bar(stat ='identity', position = position_dodge()) +
  theme_bw() + scale_fill_manual(values = c('gray', 'black')) +
  labs(x = "Race" , y= "Count") + 
  facet_wrap(~party) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

ks.test(check_race$n[(check_race$frame == 'health') & (check_race$party == 'Republican')],
        check_race$n[(check_race$frame == 'econ') & (check_race$party == 'Republican')])

ks.test(check_race$n[(check_race$frame == 'health') & (check_race$party == 'Democrat')],
        check_race$n[(check_race$frame == 'econ') & (check_race$party == 'Democrat')])

## hisp
check_hisp = df2 %>% group_by(frame, party, hisp) %>% 
  tally() %>%
  mutate(Frame = ifelse(frame == 'health', 
                        'health-frame',
                        'economy-frame')) %>%
  filter(hisp != "")

check_hisp %>%  
  ggplot(aes(x = hisp, y=n, fill=Frame)) +
  geom_bar(stat ='identity', position = position_dodge()) +
  theme_bw() + scale_fill_manual(values = c('gray', 'black')) +
  labs(x = "Hispanic" , y= "Count") + 
  facet_wrap(~party) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

ks.test(check_hisp$n[(check_hisp$frame == 'health') & (check_hisp$party == 'Republican')],
        check_hisp$n[(check_hisp$frame == 'econ') & (check_hisp$party == 'Republican')])

ks.test(check_hisp$n[(check_hisp$frame == 'health') & (check_hisp$party == 'Democrat')],
        check_hisp$n[(check_hisp$frame == 'econ') & (check_hisp$party == 'Democrat')])

## edu
check_edu = df2 %>% group_by(frame, party, edu) %>% 
  tally() %>%
  mutate(Frame = ifelse(frame == 'health', 
                        'health-frame',
                        'economy-frame')) %>%
  mutate(new_edu = case_when(edu == 1 ~ 'Did not graduate from high school', 
                             edu == 2 ~ 'High school diploma or the equivalent (GED)', 
                             edu == 3 ~ 'Some college',
                             edu == 4 ~ "Associate's degree",
                             edu == 5 ~ "Bachelor's degree",
                             edu == 6 ~ "Master's degree",
                             edu == 7 ~ "Professional or doctorate degree")) 
  

check_edu %>%  
  ggplot(aes(x = new_edu, y=n, fill=Frame)) +
  geom_bar(stat ='identity', position = position_dodge()) +
  theme_bw() + scale_fill_manual(values = c('gray', 'black')) +
  labs(x = "Education" , y= "Count") + 
  facet_wrap(~party) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

ks.test(check_edu$n[(check_edu$frame == 'health') & (check_edu$party == 'Republican')],
        check_edu$n[(check_edu$frame == 'econ') & (check_edu$party == 'Republican')])

ks.test(check_edu$n[(check_edu$frame == 'health') & (check_edu$party == 'Democrat')],
        check_edu$n[(check_edu$frame == 'econ') & (check_edu$party == 'Democrat')])

## ideology
check_ideology = df2 %>% group_by(frame, party, ideology) %>% 
  tally() %>%
  mutate(Frame = ifelse(frame == 'health', 
                        'health-frame',
                        'economy-frame')) %>%
  mutate(new_ideology = case_when(ideology == 1 ~ 'Very conservative', 
                                  ideology == 2 ~ 'Conservative', 
                                  ideology == 3 ~ 'Slightly conservative',
                                  ideology == 4 ~ "Moderate;middle of the road",
                                  ideology == 5 ~ "Slightly liberal",
                                  ideology == 6 ~ "Liberal",
                                  ideology == 7 ~ "Very liberal")) 


check_ideology %>%  
  ggplot(aes(x = new_ideology, y=n, fill=Frame)) +
  geom_bar(stat ='identity', position = position_dodge()) +
  theme_bw() + scale_fill_manual(values = c('gray', 'black')) +
  labs(x = "Ideology" , y= "Count") + 
  facet_wrap(~party) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

ks.test(check_ideology$n[(check_ideology$frame == 'health') & (check_ideology$party == 'Republican')],
        check_ideology$n[(check_ideology$frame == 'econ') & (check_ideology$party == 'Republican')])

ks.test(check_ideology$n[(check_ideology$frame == 'health') & (check_ideology$party == 'Democrat')],
        check_ideology$n[(check_ideology$frame == 'econ') & (check_ideology$party == 'Democrat')])


############## cvm test distribution comparison  ############
# before treatment and after treatment, vax intention
cvm.test(df2$post_vax_intent,
         ecdf(df2$pre_vax_intent)) 

# pre-t and post-t under econ and health framing
cvm.test(df2[(df2$frame == "econ"), ]$post_vax_intent,
         ecdf(df2[(df2$frame == "econ"), ]$pre_vax_intent)) 

cvm.test(df2[(df2$frame == "health"), ]$post_vax_intent,
         ecdf(df2[(df2$frame == "health"), ]$pre_vax_intent)) 

# across partisans, frames comparison
# Indp
cvm.test(df2[(df2$frame == "health") & (df2$party== "Independent"), ]$post_vax_intent,
         stepfun(1:5, empirical_cdf(df2[(df2$frame == "health") & (df2$party== "Independent"), ]$pre_vax_intent, 
         ubounds=seq(0, 5, by=1.0))$CDF))

cvm.test(df2[(df2$frame == "econ") & (df2$party == "Independent"), ]$post_vax_intent,
         stepfun(1:5, empirical_cdf(df2[(df2$frame == "econ") & (df2$party== "Independent"), ]$pre_vax_intent, 
         ubounds=seq(0, 5, by=1.0))$CDF)) 

# Dem
cvm.test(df2[(df2$frame == "health") & (df2$party == "Democrat"), ]$post_vax_intent,
         ecdf(df2[(df2$frame == "health") & (df2$party == "Democrat"), ]$pre_vax_intent))

cvm.test(df2[(df2$frame == "econ") & (df2$party == "Democrat"), ]$post_vax_intent,
         ecdf(df2[(df2$frame == "econ") & (df2$party == "Democrat"), ]$pre_vax_intent))

# Rep
cvm.test(df2[(df2$frame == "health") & (df2$party== "Republican"), ]$post_vax_intent,
         ecdf(df2[(df2$frame == "health") & (df2$party== "Republican"), ]$pre_vax_intent))

cvm.test(df2[(df2$frame == "econ") & (df2$party== "Republican"), ]$post_vax_intent,
         stepfun(1:5, empirical_cdf(df2[(df2$frame == "econ") & (df2$party== "Republican"), ]$pre_vax_intent, 
         ubounds=seq(0, 5, by=1.0))$CDF))

############# ANOVA: pre-treatment vax intention and post-treatment vax intention
# focus on most vaccine-resistant republicans
dta = as.data.frame(df2 %>% filter(party == 'Republican', pre_vax_intent <=1))

# one-way ANOVA on frame
aov = aov(post_vax_intent ~ frame , data = dta)
summary(aov)
TukeyHSD(aov)

# rep effect size: eta squared
etaSquared(aov(post_vax_intent ~ frame, data = dta))

# two sample t-test on most resistant republicans
rep_test = t.test(post_vax_intent ~ frame, data = dta)

# two-way ANCOVA on frame and gist
# no univeral significant effects between gist and no_gist, but gist works better on econ, gist might backfire on health?
aov2 = aov(post_vax_intent ~ frame * gist , data = dta)
summary(aov2)
TukeyHSD(aov2)

# focus on most vaccine-resistant dems
dta_dem = as.data.frame(df2 %>% filter(party == 'Democrat', pre_vax_intent <=1))

# one-way ANOVA on frame
aov_dem = aov(post_vax_intent ~ frame , data = dta_dem)
summary(aov_dem)
TukeyHSD(aov_dem)

# dem effect size: eta squared
etaSquared(aov(post_vax_intent ~ frame, data = dta_dem))


# Bonferroni correction
pairwise.t.test(dta$post_vax_intent, dta$frame, p.adjust.method="bonferroni")
pairwise.t.test(dta_dem$post_vax_intent, dta_dem$frame, p.adjust.method="bonferroni")

############## visualization: figure ####################
df22 = melt(df2[, c('post_vax_intent', 'pre_vax_intent', 'party', 'frame')], 
            id = c('frame', 'party'))
colnames(df22) = c("frame", "party", "variable", "value")

to_string = as_labeller(c('econ' = "Economy Framing", 
                          'health' = "Health Framing", 
                          'Republican' = 'Republican', 
                          'Democrat' = 'Democrat'))

ann_text = data.frame(frame = c("econ", "econ", "health", "health"), 
                      party = c('Democrat', 'Republican', 'Democrat', 'Republican'),
                      variable = c('post_vax_intent',  'post_vax_intent', 'pre_vax_intent', 'pre_vax_intent'),
                      value = c(2,2,1,1))
# put p-value
ann_text$label = c('(P=0.07669)',  '(P=9.403e-15)','(P=0.007026)', '(P=0.00161)')

# only plot Republicans and Democrats
df222 = df22 %>% filter(party != 'Independent') 

# plot quantile lines 
# define quantiles first
q_lines = c(0.1, 0.2, 0.3, 0.5, 0.7, 0.8, 0.85, 0.9)

df_lines = as.data.frame(df2 %>% 
                           group_by(party, frame) %>%  
                           summarise(enframe(quantile(post_vax_intent, q_lines), "quantile", "pt_vax"),
                                     enframe(quantile(pre_vax_intent, q_lines), "quantile", "pret_vax")) %>% 
                           filter(quantile == '90%'))

df_lines1  = df_lines %>% filter(party != 'Independent') 
df_lines1$xstart = c(NA, NA, 1.3, 1.4)
df_lines1$xend = c(NA, NA,  1.8, 2.5)
df_lines1$ystart = c(NA, NA, 1.5, 1.5)
df_lines1$yend = c(NA, NA,  1.5, 1.5)


df_lines1$party = factor(df_lines1$party)
df_lines1$frame = factor(df_lines1$frame)

df_lines2 = melt(df_lines1[, c('party', 'frame', 'pt_vax', 'pret_vax', 'xstart', 'xend', 'ystart', 'yend')], 
                 id = c("party", "frame", 'xstart', 'xend', 'ystart', 'yend'))

df_lines2$type = NA
df_lines2$type[df_lines2$variable == "pt_vax"] = "dashed"
df_lines2$type[df_lines2$variable == "pret_vax"] = "dotted"

ggplot(data = df222, aes(x = value, fill = variable)) + 
  geom_density(alpha = 0.5, aes(color = variable), bw='bcv', adjust=0.6) +  
  facet_grid(party ~ frame, labeller = to_string, as.table=F) + 
  theme_bw() +
  scale_fill_manual(values = c('black', 'grey'), 
                    labels = c('post-treatment \n vaccination intent', 
                               'pre-treatment \n vaccination intent'),
                    name = "Variable") +
  scale_color_manual(values = c('black', 'grey'), guide = FALSE) +
  labs(x = "Vaccination intent scale \n (1 - extremely unlikely, 5 - extremely likely)", 
       y = "Density") +
  geom_text(size = 3, data = ann_text, mapping = aes(x = 3.9, y = 1.4, label = label)) +
  geom_vline(data = df_lines2, 
             aes(xintercept = value, linetype = type), 
             color = 'red', 
             size = 0.5, 
             key_glyph = "path")  +
  scale_linetype_manual(values = c('dashed','dotted'), 
                        labels = c('post-treatment \n vaccination intent',  
                                   'pre-treatment \n vaccination intent'), 
                        name = "90% percentile line") 

geom_segment(data = df_lines2, aes(x = xstart, y = ystart, xend = xend, yend = yend), 
             arrow=arrow(length=unit(0.15,"cm")), size=0.3, inherit.aes = FALSE)  


# power analysis
# 90% quantile shift among Rep between health and econ frame power test
rep_ed = as.numeric(df_lines %>% filter(party == 'Republican', frame == 'econ') %>% pull(pt_vax) - 
                      df_lines %>% filter(party == 'Republican', frame == 'econ') %>% pull(pret_vax)) 
rep_hd = as.numeric(df_lines %>% filter(party == 'Republican', frame == 'health') %>% pull(pt_vax) - 
                      df_lines %>% filter(party == 'Republican', frame == 'health') %>% pull(pret_vax)) 

pwr.t.test(d = rep_ed - rep_hd, n = dim(dta)[1]/2, 
           sig.level = 0.01, 
           alternative = "greater",
           type = "two.sample")


# 90% quantile shift among Rep between health and econ frame power test
dem_ed = as.numeric(df_lines %>% filter(party == 'Democrat', frame == 'econ') %>% pull(pt_vax) - 
                      df_lines %>% filter(party == 'Democrat', frame == 'econ') %>% pull(pret_vax)) 
dem_hd = as.numeric(df_lines %>% filter(party == 'Democrat', frame == 'health') %>% pull(pt_vax) - 
                      df_lines %>% filter(party == 'Democrat', frame == 'health') %>% pull(pret_vax)) 

pwr.t.test(d = dem_ed - dem_hd, 
           n = as.numeric(df2 %>% group_by(party) %>% tally() %>% filter(party == "Democrat") %>% pull(n))/2, 
           sig.level = 0.01, 
           alternative = "less",
           type = "two.sample")



pwr.p.test(h = 0.1, 
           n = as.numeric(df2 %>% group_by(party, frame) %>% tally() %>% filter(party == "Democrat", frame=="econ") %>% pull(n)), 
           sig.level = 0.05, 
           alternative = "greater")

pwr.p.test(h=0.4,power=0.85,sig.level=0.05,alternative="two.sided")

############## mediation analysis ##################
# ANOVA on risk
summary(aov(new_risk_perfinan ~ party * frame, data = df2))

risk_test = t.test(df2[df2$frame == "econ", ]$new_risk_perfinan, 
                   df2[df2$frame == "health", ]$new_risk_perfinan)

# power test
pwr.t2n.test(d = risk_test$estimate[1] - risk_test$estimate[2],
           n1 = 149, n2 = 150, sig.level = 0.05, alternative = "greater")


# causal mediation anlysis with bootstrap
fitM = lm(new_risk_perfinan ~ frame, data = df2)
summary(fitM)
fitY = lm(post_vax_intent ~ frame + new_risk_perfinan, data = df2)
summary(fitY) 

set.seed(1)
fitMedBoot = mediate(fitM, fitY, boot=TRUE, sims=500, treat="frame", mediator="new_risk_perfinan")
summary(fitMedBoot)


